#encoding = utf-8
import pandas as pd
from PIL import Image
from os import path

def checkImg(obj):
    try:
        file_path = path.join("/home/zhengpeng/fakeddit/", 'images/', obj["id"] + ".jpg")
        #print(file_path)
        Image.open(file_path)
        
    except:
        return False
    return True

train_data = pd.read_csv('/home/zhengpeng/fakeddit/multimodal_only_samples/multimodal_test_public.tsv', sep='\t')
#train_data = train_data[:int(train_data.shape[0]/100)]


train_data = train_data[train_data.apply(checkImg, axis=1)]
train_data.to_csv('/home/zhengpeng/fakeddit/multimodal_only_samples/fix_test.tsv', sep='\t')
print(train_data.shape)